#!/usr/bin/python3
##################################################
'''字符串解析'''
# Copyright (c) 2022 Xu Ruijun | 1687701765@qq.com
# the file is MIT License
##################################################

__author__ = "Xu Ruijun"
__copyright__ = "Copyright (c) 2022 Xu Ruijun"
__license__ = "GPLv3 or later"


prefix_dict = {
'f':-5, 'p':-4, 'n':-3, 'μ':-2, 'm':-1, 'k':1, 'M':2, 'G':3, 'T':4, 'P':5,
                'N':-3, 'u':-2,         'K':1,        'g':3,
                        'U':-2,
}

def parser_SInum(s):
    insec = set(s) & prefix_dict.keys()
    if insec:
        assert len(insec) == 1
        ch = list(insec)[0]
        assert s.count(ch) == 1
        coef = 1000**prefix_dict[ch]
        index = s.index(ch)
        if index == len(s) - 1:
            return float(s[:-1]) * coef
        else:
            assert '.' not in s
            s2 = s.replace(ch, '.')
            return float(s2) * coef
    else:
        return float(s)

def parser_SInum_unit(s, units=[]):
    '''
    units = ['m', 'mol']
    '''
    #################
    # s = '12.34  cm/s'
    #      |    | |
    #      0    5 8
    #          di ui
    # digit_part = s[:di] # '12.34'
    # unit_part = s[ui:] # 'm/s'
    assert len(s) > 0
    di = len(s) - 1
    # 寻找最后一个数字字符
    while True:
        if s[di].isdecimal():
            break  # 是十进制数字
        elif di == 0:
            raise ValueError(f'no decimal character in {str(s)}')
        else:
            di -= 1
    di += 1
    if di >= len(s):
        return parser_SInum(s), ''
    if s[di] == '.':
        di += 1  # 把小数点归于数字
    ui = di
    while ui < len(s) and s[ui].isspace():
        ui += 1
    if s[ui] in prefix_dict:
        for u in units:
            if s[ui:].find(u) == 0:
                if len(u) == 1 and (ui+1 >= len(s) or not s[ui+1].isalpha()):
                    break  # example: '1m', '1m/s'
                if len(u) >= 2:
                    break  # example: '1mol'
        else:
            # first char of unit as SI prefix
            return parser_SInum(s[:ui+1]), s[ui+1:].lstrip()
    return parser_SInum(s[:ui]), s[ui:]  # not as SI prefix
